Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Tabula Muris Data

This will generate the Tabulamuris data used.

The TabulaMurisData was of huge help here !



In [1]:

    
library(Seurat)
suppressPackageStartupMessages({
    library(ExperimentHub)
    library(SingleCellExperiment)
    library(TabulaMurisData)
})
#> snapshotDate(): 2018-10-31
library(scater)
library(scran)
library(ggplot2)









    



snapshotDate(): 2018-10-30
Loading required package: ggplot2

Attaching package: 'scater'

The following object is masked from 'package:S4Vectors':

    rename

The following object is masked from 'package:stats':

    filter



In [2]:

    
run_qc <- function(sce) {
  isSpike(sce, "ERCC") <- grepl("^ERCC", rownames(sce))
  sce <- calculateQCMetrics(sce)

  # Identify outliers, but without using the mouse as a batch
  libsize.drop <- isOutlier(sce$total_counts, nmads=3, type="lower", log=TRUE)
  feature.drop <- isOutlier(sce$total_features_by_counts, nmads=3, type="lower", log=TRUE)
  spike.drop <- isOutlier(sce$pct_counts_ERCC, nmads=3, type="higher")
  keep <- !(libsize.drop | feature.drop | spike.drop)
  sce <- sce[,keep]

  num.cells <- nexprs(sce, byrow=TRUE)
  to.keep <- num.cells > 0
  sce <- sce[to.keep,]
  sce
}



In [3]:

    
eh <- ExperimentHub()
#> snapshotDate(): 2018-10-31
query(eh, "TabulaMurisData")









    



snapshotDate(): 2018-10-30






    





ExperimentHub with 2 records
# snapshotDate(): 2018-10-30 
# $dataprovider: Tabula Muris Consortium
# $species: Mus musculus
# $rdataclass: SingleCellExperiment
# additional mcols(): taxonomyid, genome, description,
#   coordinate_1_based, maintainer, rdatadateadded, preparerclass, tags,
#   rdatapath, sourceurl, sourcetype 
# retrieve records with, e.g., 'object[["EH1617"]]' 

           title               
  EH1617 | TabulaMurisDroplet  
  EH1618 | TabulaMurisSmartSeq2



In [4]:

    
sce <- eh[["EH1618"]]
sce <- sce[, sce$tissue == 'Skin' |
    sce$tissue == 'Large_Intestine' |
    sce$tissue == 'Spleen' |
    sce$tissue == 'Brain_Myeloid']

sce <- run_qc(sce)









    



see ?TabulaMurisData and browseVignettes('TabulaMurisData') for documentation
downloading 0 resources
loading from cache 
    '/root//.ExperimentHub/1618'



In [5]:

    
sce$label <- sce$tissue
sce$subtissue[is.na(sce$subtissue)] <- 'None'
sce$cell_ontology_class[is.na(sce$cell_ontology_class)] <- 'None'                              
sce$cell_ontology_id[is.na(sce$cell_ontology_id)] <- 'None'                                    
sce$free_annotation[is.na(sce$free_annotation)] <- 'None'                                      
sce$FACS_selection[is.na(sce$FACS_selection)] <- 'None' 
write.csv(as.matrix(counts(sce)), 'tm_tissue_mix.12k.counts.csv')
write.csv(colData(sce), 'tm_tissue_mix.12k.metadata.csv')
write.csv(rowData(sce), 'tm_tissue_mix.12k.featuredata.csv')
saveRDS(sce, paste('cell_mix/tm_tissue_mix.12k', 'rds', sep='.'))



In [6]:

    
sce









    





class: SingleCellExperiment 
dim: 22872 12081 
metadata(0):
assays(1): counts
rownames(22872): 0610005C13Rik 0610007C21Rik ... Zzef1 Zzz3
rowData names(10): ID Symbol ... total_counts log10_total_counts
colnames(12081): A1.B000126.3_39_F.1.1 A1.B000826.3_39_F.1.1 ...
  P9.MAA001869.3_38_F.1.1 P9.MAA001871.3_39_F.1.1
colData names(48): cell plate_barcode ...
  pct_counts_in_top_500_features_ERCC label
reducedDimNames(0):
spikeNames(1): ERCC



In [46]:

    
dat <- as.Seurat(sce, counts='counts', data='counts')
dat <- NormalizeData(dat)
dat <- FindVariableFeatures(dat)
all.genes <- rownames(dat)
dat <- ScaleData(dat, features = all.genes)
dat <- RunPCA(dat)









    



Centering and scaling data matrix






    



PC_ 1 
Positive:  Lgals4, Cldn7, Epcam, Krt8, Tspan1, Cldn3, Tspan8, Krt19, Tmem45b, Elf3 
	   Muc13, Ceacam1, 2210415F13Rik, Oit1, Pigr, Slc44a4, Car2, Phgr1, Ckmt1, Ethe1 
	   B4galnt2, Mgst3, Krt18, Gale, 9130017N09Rik, Tsc22d1, Cdhr5, Guca2a, Klk1, Gmds 
Negative:  Csf1r, Cx3cr1, Olfml3, Rac2, Itgam, Rab3il1, Mpeg1, Lrrc3, Hvcn1, Pros1 
	   Fgd2, Il10ra, Gna15, Cd52, Tbxas1, Stab1, Slc29a3, Nckap1l, Ccr5, Abca9 
	   Ets1, Il21r, Havcr2, Sash3, Lyz2, Slc7a8, Cd33, Slc11a1, Cd300a, C3ar1 
PC_ 2 
Positive:  Csf1r, Cx3cr1, Olfml3, Rac2, Ceacam1, Cd52, Plac8, Itgam, Hvcn1, Lgals4 
	   Rab3il1, Mpeg1, Krt8, Cldn3, Cldn7, Cnp, Tspan1, Il10ra, Fgd2, 2210415F13Rik 
	   Muc13, Lrrc3, Krt19, Sash3, Tmem45b, Lsp1, Tbxas1, Nckap1l, Phgr1, Pglyrp1 
Negative:  Lgals7, Krt5, Trim29, Krt15, Anxa8, Dmkn, Col17a1, Hspb1, Crip2, Perp 
	   Apoe, Anxa1, 1190003J15Rik, Serpinb5, Ptrf, Tgfbi, Ecm1, Mt2, Cxcl14, Sfn 
	   Aqp3, Anxa2, Gsn, Cav1, Gata3, Mt1, Ptn, Krt14, Sbsn, Ccl27a 
PC_ 3 
Positive:  Ptprcap, Cd79a, Ltb, H2-Aa, Il2rg, H2-Eb1, H2-Ab1, Cd74, Cd52, Ms4a1 
	   H2-DMb2, Faim3, Cd19, Cd79b, Sell, Sp100, Cd2, Ly6d, Napsa, Cnn2 
	   Ppp1r16b, Tbc1d10c, Gimap6, Gimap4, Gimap3, Tnfrsf13c, Dusp2, Blk, Ccr7, Lsp1 
Negative:  Cx3cr1, Csf1r, Olfml3, Itgam, Rab3il1, Lrrc3, Pros1, Mpeg1, Tbxas1, Stab1 
	   Abca9, Slc29a3, Slc7a8, Ccr5, Gna15, Havcr2, Cd33, 1500010J02Rik, Slc11a1, Plod1 
	   C3ar1, Emr1, 9830001H06Rik, Cd300a, Abhd15, Lyz2, C5ar1, 1700017B05Rik, Slc46a1, Tspan4 
PC_ 4 
Positive:  Agr2, Spink4, Hpd, Atp2c2, Gm1123, Hepacam2, Slc12a8, Spdef, Tpsg1, Creb3l4 
	   Atoh1, Ern2, Bace2, Qsox1, Pdia5, Ccl6, 1810030J14Rik, Gpr20, Nupr1, Hgfac 
	   Reg4, Bcas1, Sval1, Ramp1, Cd177, Lrrc26, Rep15, Best2, Klk1, Ceacam10 
Negative:  Cyp2c55, 1810065E05Rik, Ces2e, Car1, Ces2c, Prss30, Krt20, Mep1a, 2200002D01Rik, Sectm1b 
	   Ces2a, Mgat4c, 1110032A04Rik, Cyp3a13, Dpep1, Tmigd1, Car4, Slc26a3, Mettl7b, Osta 
	   Aldh1a1, Slc13a2, Cyp2c65, Fabp2, Fa2h, Hsd3b3, Sult1b1, Ceacam20, Mal, Cdhr2 
PC_ 5 
Positive:  Clec2h, Rep15, Gm1123, Tpsg1, Ramp1, Sval1, Hpd, Lrrc26, Hgfac, Krt20 
	   Bcas1, Hepacam2, Atp2c2, Spink4, Fhl1, Cgref1, Spdef, Best2, Ccl6, Gpr20 
	   1810030J14Rik, Zg16, Clca3, Atoh1, Guca2a, Mxd1, Saa1, Tff3, Slc12a8, Clps 
Negative:  Stmn1, Ccna2, Birc5, Cdca3, Ube2c, Ccnb2, Sult1a1, Mgst1, Clca4, Cdc20 
	   Top2a, Cdk1, Kcne3, Ccnb1, Mki67, Gstm3, Pbk, Gpx2, Aadac, Aldob 
	   Kif22, Racgap1, Cenpf, Aurkb, Ugt2b5, Kif20a, Hao2, Cenpa, Fads2, Gsta4



In [47]:

    
DimPlot(dat, reduction = 'pca', group.by = 'tissue')



In [10]:

    
DimPlot(subset(dat, subset = tissue == 'Thymus' | tissue == 'Spleen'), reduction = 'pca', group.by = 'cell_ontology_class')



In [16]:

    
DimPlot(subset(dat, subset = tissue == 'Thymus' | tissue == 'Spleen'), reduction = 'pca', group.by = 'tissue')



In [11]:

    
DimPlot(subset(dat, subset = tissue == 'Skin' | tissue == 'Tongue'), reduction = 'pca', group.by = 'cell_ontology_class')



In [15]:

    
DimPlot(subset(dat, subset = tissue == 'Skin' | tissue == 'Tongue'), reduction = 'pca', group.by = 'tissue')



In [48]:

    
dat <- RunUMAP(dat, dims=1:10)



In [49]:

    
DimPlot(dat, reduction = 'umap', group.by = 'tissue')



In [20]:

    
DimPlot(subset(dat, subset = tissue == 'Skin' | tissue == 'Tongue'), reduction = 'umap', group.by = 'cell_ontology_class')



In [24]:

    
DimPlot(subset(dat, subset = tissue == 'Skin' | tissue == 'Tongue'), reduction = 'umap', group.by = 'tissue')



In [25]:

    
DimPlot(subset(dat, subset = tissue == 'Spleen' | tissue == 'Thymus'), reduction = 'umap', group.by = 'cell_ontology_class')



In [44]:



In [50]:



In [5]:

    
sce$tissue <- as.factor(sce$tissue)
summary(sce$tissue)









    





	Brain_Myeloid
		4762
	Large_Intestine
		4149
	Skin
		2464
	Spleen
		1718